Let's go through the intro machine learning tutorial: http://scikit-learn.org/stable/tutorial/basic/tutorial.html
We should have already installed sklearn if not then use: pip install -U scikit-learn
Or follow the instructions at: http://scikit-learn.org/stable/install.html
In [1]:
from sklearn import datasets
#install test sets that come wiht sklearn
iris = datasets.load_iris()
digits = datasets.load_digits()
In [2]:
print(digits.data)
In [3]:
digits.target
Out[3]:
In [4]:
digits.images[0]
Out[4]:
In [5]:
#Predict which digit is being represented by an image
from sklearn import svm
clf = svm.SVC(gamma=0.001, C=100.)
#gamma was set manually, this could be found automaticall with good values other tools
clf.fit(digits.data[:-1], digits.target[:-1])
Out[5]:
In [9]:
from sklearn import datasets
import matplotlib.pyplot as plt
%matplotlib inline
#Load the digits dataset
digits = datasets.load_digits()
#Display the first digit
plt.figure(1, figsize=(3, 3))
plt.imshow(digits.images[-1], cmap=plt.cm.gray_r, interpolation='nearest')
plt.show()
# predict which digit this is
clf.predict(digits.data[-1:])
Out[9]:
Do you agree that this is an image of an 8? Did something go wrong? What could be done better.
In [11]:
from sklearn import svm
from sklearn import datasets
clf = svm.SVC()
iris = datasets.load_iris()
X, y = iris.data, iris.target
clf.fit(X, y)
Out[11]:
In [13]:
import pickle
s = pickle.dumps(clf)
clf2 = pickle.loads(s)
clf2.predict(X[0:1])
Out[13]:
In [18]:
from sklearn.externals import joblib
joblib.dump(clf, 'filename.pkl')
#gives back the list of files
Out[18]:
In [20]:
clf = joblib.load('filename.pkl')
In [23]:
import numpy as np
from sklearn import random_projection
Out[23]:
In [27]:
# default type is float64 unless specified
rng = np.random.RandomState(0)
X = rng.rand(10, 2000)
X = np.array(X, dtype='float32')
transformer = random_projection.GaussianRandomProjection()
X_new = transformer.fit_transform(X)
print ( "Data types for X = " + str(X.dtype) + " and X_new = "+ str(X_new.dtype))
In [28]:
from sklearn import datasets
from sklearn.svm import SVC
iris = datasets.load_iris()
clf = SVC()
clf.fit(iris.data, iris.target)
Out[28]:
In [29]:
list(clf.predict(iris.data[:3]))
Out[29]:
In [30]:
clf.fit(iris.data, iris.target_names[iris.target])
Out[30]:
In [31]:
list(clf.predict(iris.data[:3]))
Out[31]:
In [ ]:
# refitting and updating parameters
In [35]:
import numpy as np
from sklearn.svm import SVC
rng = np.random.RandomState(0)
X = rng.rand(100, 10)
y = rng.binomial(1, 0.5, 100)
X_test = rng.rand(5, 10)
clf = SVC()
clf.set_params(kernel='linear').fit(X, y)
print ("Predict x_test: "+str(clf.predict(X_test)))
clf.set_params(kernel='rbf').fit(X, y)
print ("Predict x_test after refitting: "+str(clf.predict(X_test)))
In [ ]: